////////////////////////////////////////////////////////////////////////////////
//////// PARLIAMENTARY ELECTIONS DATASET 
////////////////////////////////////////////////////////////////////////////////

//List of flags
local flags = "flag_wrong_sum_seats flag_coup flag_inconsequential flag_inconsequential_note flag_vacant_seats flag_vacant_seats_nb flag_appointed flag_appointed_nb flag_non_partisan"

//Different sources used
local AC_L_data = "$project_path/data/1_input/elections/AC_Leg/AC_L_database.dta"
local AED_L_data = "$project_path/data/1_input/elections/AED_Leg/AED_L_database.dta"
local EED_L_data = "$project_path/data/1_input/elections/EED_Leg/EED_L_database.dta"
local PDA_L_data = "$project_path/data/1_input/elections/PDA_Leg/PDA_L_database.dta"
local Wiki_L_data = "$project_path/data/1_input/elections/Wikipedia_Leg/Wiki_L_database.dta"
local IFES_L_data = "$project_path/data/1_input/elections/IFES_Leg/IFES_L_database.dta"
local IPU_L_data = "$project_path/data/1_input/elections/IPU_Leg/IPU_L_database.dta"
local Nohlen_data = "$project_path/data/1_input/elections/Nohlen_Leg/Nohlen_L_database.dta"
local MP_data = "$project_path/data/2_intermediary/elections/Parliamentary elections/parl_elec_mp.dta"
local PARL_data = "$project_path/data/2_intermediary/elections/Parliamentary elections/parl_elec_parlgov.dta"
local DPI_data = "$project_path/data/2_intermediary/elections/Parliamentary elections/parl_elec_dpi.dta"
local GLOBAL_data = "$project_path/data/2_intermediary/elections/Parliamentary elections/parl_elec_global.dta"
  
//Priority list
local priority_list = "PARL MP Nohlen DPI GLOBAL AC_L AED_L EED_L PDA_L IPU_L IFES_L Wiki_L"

////////////////////////////////////////////////////////////////////////////////
//////// a. Assigning a source to each election and merging flags
////////////////////////////////////////////////////////////////////////////////

// On top of the elections found in V-Dem and our other sources, we found three elections in CLEA and IDEA
*Ethiopia 1992 and Togo 1997 are in IDEA, Marshall Islands 2011 is in CLEA
clear
input str25 Country	Year	Month	str15 Type_Election
"Ethiopia"	1992	.	"Parliamentary"
"Togo"	1997	.	"Parliamentary"
"Marshall Islands"	2011	.	"Parliamentary"
end
tempfile additional_elections 
save `additional_elections'

// List of parliamentary elections in V-Dem
use "$project_path/data/2_intermediary/elections/Parliamentary elections/parl_elec_vdem", clear
append using `additional_elections'

// Initializing flags
gen flag_inconsequential = . 
gen flag_inconsequential_note = ""
gen flag_coup = .
gen flag_appointed = . 
gen flag_appointed_nb = . 
gen flag_vacant_seats = .
gen flag_vacant_seats_nb = .
gen flag_non_partisan = .

// Merging flags and availability variable for all sources
foreach source in `priority_list' {
	tempfile temp
	save `temp'
	use "``source'_data'", clear
	gen available_`source' = 1 if Party_1!=""
	keep Country Year Month Type_Election available_`source' flag_*
	foreach v of varlist flag_* {
		ren `v' `v'_`source'
	}
	merge 1:1 Country Year Month Type_Election using `temp', nogen
}

// Assigning flags 
foreach source in `priority_list' {
	foreach flag in flag_coup flag_inconsequential flag_vacant_seats flag_vacant_seats_nb flag_appointed flag_appointed_nb flag_non_partisan {
		replace `flag' = `flag'_`source' if `flag'==.
		drop `flag'_`source'
	}
	replace flag_inconsequential_note = flag_inconsequential_note_`source' if flag_inconsequential_note==""
	drop flag_inconsequential_note_`source'
}

// Flag for elections that are not the last of their type during a year
sort Country Year Month
by Country Year : gen temp = _n
by Country Year : egen temp2 = max(temp)
gen flag_not_last = 1 if temp!=temp2
drop temp*
// Assigning source for each election (the "best" variable tracks whether the best available source has inconsistent shares)
gen Source = ""
gen best = 0
foreach source in `priority_list' {
	replace Source = "`source'" if available_`source'==1 & flag_wrong_sum_seats_`source'!=1 & (best==0 | best==2)
	replace Source = "`source'" if available_`source'==1 & flag_wrong_sum_seats_`source'==1 & best==0
	replace best = 1 if available_`source'==1 & flag_wrong_sum_seats_`source'!=1
	replace best = 2 if available_`source'==1 & best==0 & flag_wrong_sum_seats_`source'==1
	drop flag_wrong_sum_seats_`source' available_`source'
}

gen flag_wrong_sum_seats = 1 if best==2
drop best
order Country Year Month Type_Election Source

// Manually changing the source when the one highest in the priority list has inaccurate or incomplete data
*Algeria 1991: the army staged a coup after a landslide victory of the oppostion (FIS)
replace Source = "Wiki_L" if Country=="Algeria" & Year==1991
*Australia 2010: PARL incorporates a member of the Western Australia's National Party into the National Party. As the latter was part of "Coalition" but not this "independent" member, we must rely on Wikipedia which does consider this nuance.
replace Source = "Wiki_L" if Country=="Australia" & Year==2010
*Brazil 1958: Nohlen reports many "Others" seats, which are allocated to different parties in the Portugese Wikipedia page. We use data from there.
replace Source = "Wiki_L" if Country=="Brazil" & Year==1958
*Brazil 1962: Nohlen reports many "Others" seats, which are allocated to different parties in the Portugese Wikipedia page. We use data from there.
replace Source = "Wiki_L" if Country=="Brazil" & Year==1962
*Chile 1993: IPU_L reports "Independents" for each coalition. Use Wikipedia & change names for affiliation
replace Source = "Wiki_L" if Country=="Chile" & Year==1993
*Colombia 2002: some parties have duplicate names in Nohlen. 
replace Source = "Wiki_L" if Country=="Colombia" & Year==2002
*Egypt 2005: DPI does not give results for independants, especially those affiliated to the muslim brotherhood.
replace Source = "Wiki_L" if Country=="Egypt" & Year==2005
*Egypt 2015: DPI does not give results for independants
replace Source = "Wiki_L" if Country=="Egypt" & Year==2015
*Gabon 1957: Nohlen has "Others" as first party
replace Source = "Wiki_L" if Country=="Gabon" & Year==1957
*Haiti 2000: Nohlen lists as independants members of the Lavalas Family Party.
replace Source = "IPU_L" if Country=="Haiti" & Year==2000
*India 1977, 1998, 1999, 2004, 2009, 2014, 2019: too many parties are clustered in "others", which makes coalition coding difficult
replace Source = "Wiki_L" if Country=="India" & (Year==1977 | Year==1998 | Year==1999 | Year==2004 | Year==2009 | Year==2014 | Year==2019)
*Ivory Coast 2011: parties missing in the original source
replace Source = "Wiki_L" if Country=="Ivory Coast" & Year==2011
*Kenya 1961: parties missing in the original source
replace Source = "Wiki_L" if Country=="Kenya" & Year==1961
*Kiribati 2003 : incomplete results in AC
replace Source = "Wiki_L" if Country=="Kiribati" & Year==2003
*Iceland until 1991: PARLGOV merges the two chambers instead of showing results for the lower chamber only. We use Nohlen instead.
replace Source = "Nohlen" if Country=="Iceland" & Year<=1991
*Laos 1958: the election was used to add 21 seats to the National Assembly. While Nohlen gives only results of this by-election, we have manually computed the total composition of the Assembly on Wiki_L
replace Source = "Wiki_L" if Country=="Laos" & Year==1958
*Libya 2012: DPI results are wrong as they include only "Appointees"
replace Source = "AC_L" if Country=="Libya" & Year==2012
*Mexico 2000: GLOBAL has only the seats elected through the FPTP system (and 200 proportionally elected seats are missing)
replace Source = "Wiki_L" if Country=="Mexico" & Year==2000
*Mozambique 1986: DPI has inconstistent results
replace Source = "Wiki_L" if Country=="Mozambique" & Year==1986
*Nigeria 1964: Nohlen includes results for both Northern People's Congress and the alliance that this party led (Nigerian National Alliance)
replace Source = "AED_L" if Country=="Nigeria" & Year==1964
*P. N. Guinea 1987: The source used by default Nohlen, but it gives the allocation of seats after independents affiliated with parties.
replace Source = "Wiki_L" if Country=="P. N. Guinea" & Year==1987
*Peru 2016: DPI reports less parties than Wiki (Peru Popular Action (5) and Popular Alliance (5) are missing)
replace Source = "Wiki_L" if Country=="Peru" & Year==2016
*Russia 1995
replace Source = "Wiki_L" if Country=="Russia" & Year==1995
*Suriname 1987: our extraction of Nohlen does not have vote shares, so we use Wikipedia (which uses Nohlen as a source) for this election
replace Source = "Wiki_L" if Country=="Suriname" & Year==1987
*Uganda 1961: inconsistent results in Nohlen
replace Source = "AED_L" if Country=="Uganda" & Year==1961
*Ukraine 2002: MP has only the seats nationally elected (and 225 seats elected in constituencies are missing) 
replace Source = "Nohlen" if Country=="Ukraine" & Year==2002
* Uganda 2001: DPI has inconsistent results, replaced by IPU for this year
replace Source = "IPU_L" if Country=="Uganda" & Year==2001
*USA 1992, 1996: DPI has wrong results for these two years so we replace them by IPU_L
replace Source = "IPU_L" if Country=="USA" & (Year==1992 | Year==1996)	
*Yemen 1982: DPI is an unreliable source
replace Source = "" if Country=="Yemen" & Year==1982	
*Zimbabwe 1979: AED does not include the White roll
replace Source = "Wiki_L" if Country=="Zimbabwe" & Year==1979

////////////////////////////////////////////////////////////////////////////////
//////// b. Gathering election results
////////////////////////////////////////////////////////////////////////////////

// Gathering data from various source

tempfile selected_sources
save `selected_sources'
foreach source in `priority_list' {
	use `selected_sources', clear
	keep if Source=="`source'"
	merge 1:1 Country Year Month Type_Election using "``source'_data'", assert(using match) keep(match) nogen
	drop flag_* 
	tempfile results_`source'
	save `results_`source''
}

keep if Country==""
foreach source in `priority_list' {
	append using `results_`source''
}
tempfile results
save `results'

use `selected_sources', clear
merge 1:1 Country Year Month Type_Election using `results', assert(master match) nogen

// Adding election dates
merge 1:1 Country Year Month Type_Election using "$project_path/data/2_intermediary/elections/Election dates/election_dates.dta", keep(master match) nogen

order Country Year Month Type_Election Date Source, first
order flag_*, last

////////////////////////////////////////////////////////////////////////////////
//////// c. Grouping parties belonging to the same coalition
////////////////////////////////////////////////////////////////////////////////

// Separating results from other variables
preserve
keep Country Year Month Type_Election Date Source flag_*
tempfile other_variables
save `other_variables'
restore
drop Date flag_*

// Reshaping the results data 
reshape long Party_ Seats_ Seat_Share_, i(Country Year Month Type_Election Source Total_Seats) j(initial_rank)
drop if Party_==""
gen Party = Party_
drop Party_
ren Party Party_
order Party_ Seats_ Seat_Share_, last
tempfile reshaped_results
save `reshaped_results'

// Building the list of coalitions
import excel "$project_path/data/1_input/elections/Coalitions/coalitions.xlsx", sheet("Sheet1") firstrow clear

// Adjusting coalition names (if the coalition has a name, we add to its name the list of member parties, else, we name the coalition with the list of its members)
gen member_list = party1
gen last_party_reached = 0
forvalues k = 2/24 {
	replace last_party_reached = 1 if party`k'==""
	replace member_list = member_list + "/" + party`k' if last_party_reached==0
}
replace coalition_name = coalition_name + " [" + member_list + "]" if coalition_name!=""
replace coalition_name = member_list if coalition_name==""
drop member_list last_party_reached

// Reshaping the coalition data
reshape long party, i(Country Year Month Source coalition_name) j(number)
drop number 
drop if party==""
ren party Party_
format coalition_name Party_ %25s

// Merging with results
merge 1:1 Country Year Month Source Party_ using `reshaped_results', assert(using match) nogen

// Aggregating results by coalition
replace coalition_name=Party_ if coalition_name==""
collapse (sum) Seats_ Seat_Share_ (min) initial_rank (count) count_Seats_=Seats_ count_Seat_Share_=Seat_Share_, by(Country Year Month Source Total_Seats coalition_name)
foreach v in Seats_ Seat_Share_ {
	replace `v' = . if count_`v'==0
	drop count_`v' 
}
format coalition_name %25s
ren coalition_name Party_

// Sorting by descending party share
gsort Country Year Month -Seat_Share_ -Seats_ initial_rank Party_
drop initial_rank

// Reshaping
bysort Country Year Month : gen rank = _n
reshape wide Party_ Seats_ Seat_Share_, i(Country Year Month Source) j(rank)
compress

// Merging back flags
merge 1:1 Country Year Month Source using `other_variables', nogen

// In some cases, after regrouping parties belonging to the same coalition, reounding issues make some parties have vote shares almost, but not strictly equal to 100% (e.g. 100.0000000001%). We adjust these cases here.
replace Seat_Share_1 = 100 if abs(Seat_Share_1-100)<0.001

// Detecting elections which have the wrong sum of seats
egen sum_seats_parties = rowtotal(Seats_*)
egen sum_seats = rowtotal(sum_seats_parties flag_vacant_seats_nb flag_appointed_nb)
replace flag_wrong_sum_seats = 1 if sum_seats_parties!=Total_Seats
replace flag_wrong_sum_seats = . if sum_seats==Total_Seats
replace flag_wrong_sum_seats = 1 if Total_Seats==.
drop sum_seats_parties sum_seats

// Labeling variables
lab var Country  "Country in which election took place"
lab var Year     "Year of the election"
lab var Month    "Month of the election"
lab var Type_Election    "Election type"
lab var flag_wrong_sum_seats		"Sum of elected seats in parliament is inconsistent with the total number of seats"
lab var flag_inconsequential		"Flags cancelled or inconsequential elections"
lab var flag_inconsequential_note		"Why flag_inconsequential is equal to 1"
lab var flag_not_last		"Not last election of type in year"
lab var flag_coup		"Flags elections that were shortly followed by a coup or revolution"
lab var flag_vacant_seats		"Flags elections which left vacant seats in parliament"
lab var flag_vacant_seats_nb		"Number of vacant seats in parliament"
lab var flag_appointed		"Flags elections in which some seats were not elected directly"
lab var flag_appointed_nb		"Number of seats for which MPs were not elected directly"
lab var flag_non_partisan		"Flags elections with no parties"
lab var flag_constituent	"Flags constituent assembly"
lab var Total_Seats			"Total number of seats"
forvalues i=1/74 {
	lab var Party_`i'        "Party or coalition n°`i'"
	lab var Seats_`i'  "Seats won by party n°`i'"
	lab var Seat_Share_`i'  "Seat share of party n°`i'"
}

format Country %30s
format Type_Election %15s
format Party_* %25s
format flag_inconsequential_note %15s

label define month_label 1 "January" 2 "February" 3 "March" 4 "April" 5 "May" 6 "June" 7 "July" 8 "August" 9 "September" 10 "October" 11 "November" 12 "December", replace
label values Month month_label
order Country Year Month Type_Election Date Source Total_Seats, first
order flag_wrong_sum_seats flag_coup flag_inconsequential flag_inconsequential_note flag_vacant_seats flag_vacant_seats_nb flag_appointed flag_appointed_nb flag_non_partisan, last 

compress
label data "Parliamentary elections database -- Marx, Pons, and Rollet (2024)"
save "$project_path/data/3_cleaned/parliamentary_elections", replace
